# Copyright (c) 2017-2020 VMware, Inc. or its affiliates
# SPDX-License-Identifier: Apache-2.0

# Default to GPHOME for both the source and target installations. These may be
# overridden manually for cross-version testing.
GPHOME_SOURCE=${GPHOME_SOURCE:-$GPHOME}
GPHOME_TARGET=${GPHOME_TARGET:-$GPHOME}

# log() prints its arguments to stdout.
#
# XXX At one point, log() printed its arguments to the TAP stream, but that
# interfered with the BATS "pretty" format, so we moved to the current solution.
# At some point it would be nice to get the TAP reporting working again, because
# it separates the reason for failure from the potentially massive amount of
# stdout/err generated by a failing test. That may require some conversations
# with bats-core upstream.
log() {
    while read -r line; do
        echo "$line"
    done <<< "$*"
}

# fail() is meant to be called from BATS tests. It will fail the current test
# after printing its arguments to the TAP stream.
fail() {
    log "$@"
    false
}

# abort() is meant to be called from BATS tests. It will exit the process after
# printing its arguments to the TAP stream.
abort() {
    log "fatal: $*"
    exit 1
}

# skip_if_no_gpdb() will skip a test if a cluster's environment is not set up.
skip_if_no_gpdb() {
    [ -n "${GPHOME_SOURCE}" ] || skip "this test requires an active GPDB source cluster (set GPHOME or GPHOME_SOURCE)"
    [ -n "${GPHOME_TARGET}" ] || skip "this test requires an active GPDB target cluster (set GPHOME or GPHOME_TARGET)"
    [ -n "${PGPORT}" ] || skip "this test requires an active GPDB source cluster (set PGPORT)"
}

# isready abstracts pg_isready semantics across postgres versions
isready() {
    local gphome=${1:-$GPHOME_SOURCE}
    local port=${2:-$PGPORT}

    if command -v "$gphome"/bin/pg_isready > /dev/null; then
        "$gphome"/bin/pg_isready -q -p "$port"
    else
        # 5X does not have pg_isready
        "$gphome"/bin/psql postgres -p "$port" -qc "SELECT 1" &> /dev/null
    fi
}

# start_source_cluster() ensures that database is up before returning
start_source_cluster() {
    isready || (source "$GPHOME_SOURCE"/greenplum_path.sh && "${GPHOME_SOURCE}"/bin/gpstart -a)
}

# stop_any_cluster will attempt to stop the cluster defined by MASTER_DATA_DIRECTORY.
stop_any_cluster() {
    local gphome
    gphome=$(awk '{ split($0, parts, "/bin/postgres"); print parts[1] }' "$MASTER_DATA_DIRECTORY"/postmaster.opts) \
        || return $?

    (source "$gphome"/greenplum_path.sh && gpstop -af) || return $?
}

# Sanity check that the passed directory looks like a valid master data
# directory for a target cluster. Intended to be called right before deleting
# said directory.
abort_unless_target_master() {
    local dir=$1

    local expected_suffix="*qddir/demoDataDir.*.-1"
    [[ "$dir" == ${expected_suffix} ]] || \
        abort "cowardly refusing to delete $dir which does not look like an upgraded demo data directory. Expected suffix ${expected_suffix}"
}

# delete_cluster takes an master data directory and calls gpdeletesystem, and
# removes the associated data directories.
delete_cluster() {
    local gphome="$1"
    local masterdir="$2"

    # Perform a sanity check before deleting.
    abort_unless_target_master "$masterdir"

    __gpdeletesystem "$gphome" "$masterdir"

    # XXX: Since gpugprade archives instead of removing data directories,
    # gpupgrade will fail when copying the master data directory to segments
    # with "file exists". To prevent this remove the data directories.
    delete_target_datadirs "$masterdir"
}

# delete_finalized_cluster takes an upgrade master data directory and deletes
# the cluster. It also resets the finalized data directories to what they were
# before upgrade by removing the upgraded data directories, and renaming the
# archive directories to their original name (which is the same as their
# upgraded name).
delete_finalized_cluster() {
    local gphome="$1"
    local masterdir="$2"

    # Perform a sanity check before deleting.
    local archive_masterdir=$(archive_dir "$masterdir")
    [ -d "$archive_masterdir" ] || abort "cowardly refusing to delete $masterdir. Expected $archive_masterdir to exist."

    __gpdeletesystem "$gphome" "$masterdir"

    local id=$(gpupgrade config show --id)

    local datadirs=$(dirname "$(dirname "$masterdir")")
    for archive in $(find "${datadirs}" -name "*${id}*.old"); do
        # The following sed matches archived data directories and returns the
        # path of the original directory. For example,
        #   /dbfast_mirror2/demoDataDir.BY6l9U0LfX8.1.old -> /dbfast_mirror2/demoDataDir1
        #   /datadirs/standby.BY6l9U0LfX8.old -> /datadirs/standby
        local original=$(sed -E 's/\.'"${id}"'(\.([-0-9]+))?\.old/\2/' <<< "$archive")
        rm -rf "${original}"
        mv "$archive" "$original"
    done
}

# Calls gpdeletesystem on the cluster pointed to by the given master data
# directory.
__gpdeletesystem() {
    local gphome="$1"
    local masterdir="$2"

    # Look up the master port (fourth line of the postmaster PID file).
    local port=$(awk 'NR == 4 { print $0 }' < "$masterdir/postmaster.pid")

    local gpdeletesystem="$gphome"/bin/gpdeletesystem

    # XXX gpdeletesystem returns 1 if there are warnings. There are always
    # warnings. So we ignore the exit code...
    (source $gphome/greenplum_path.sh && yes | PGPORT="$port" "$gpdeletesystem" -fd "$masterdir") || true
}

delete_target_datadirs() {
    local masterdir="$1"
    local datadir=$(dirname "$(dirname "$masterdir")")

    rm -rf "${datadir}"/*/demoDataDir.*.[0-9]
}

# require_gnu_stat tries to find a GNU stat program. If one is found, it will be
# assigned to the STAT global variable; otherwise the current test is skipped.
require_gnu_stat() {
    if command -v gstat > /dev/null; then
        STAT=gstat
    elif command -v stat > /dev/null; then
        STAT=stat
    else
        skip "GNU stat is required for this test"
    fi

    # Check to make sure what we have is really GNU.
    local version=$($STAT --version || true)
    [[ $version = *"GNU coreutils"* ]] || skip "GNU stat is required for this test"
}

process_is_running() {
    ps -ef | grep -wGc "$1"
}

# Takes an original datadir and echoes the expected temporary datadir containing
# the upgradeID.
#
# NOTE for devs: this is just for getting the expected data directories, which
# is an implementation detail. If you want the actual location of the new master
# data directory after an initialization, you can just ask the hub with
#
#    gpupgrade config show --target-datadir
#
expected_target_datadir() {
    local dir=$1
    local parentDir=$(dirname "${dir}")
    local baseDir=$(basename "${dir}")
    local suffix="${baseDir#demoDataDir}"

    local upgradeID
    upgradeID=$(gpupgrade config show --id)

    # Sanity check.
    [ -n "$parentDir" ]

    if [ "${baseDir}" == "standby" ]; then
        echo "${parentDir}/${baseDir}.${upgradeID}"
        return
    fi

    echo "${parentDir}/demoDataDir.${upgradeID}.${suffix}"
}

# archive_dir echoes the expected archive directory given an original data
# directory.
archive_dir() {
    local dir=$1
    echo "$(expected_target_datadir "$dir")".old
}

is_GPDB5() {
    local gphome=$1
    local version=$("$gphome"/bin/postgres --gp-version)

    [[ $version =~ ^"postgres (Greenplum Database) 5." ]]
}

# query_datadirs returns the datadirs across various version of GPDB.
# Arguments are GPHOME, PGPORT, and the WHERE clause to use when querying
# gp_segment_configuration.
query_datadirs() {
    local gphome=$1
    local port=$2
    local where_clause=${3:-true}

    local sql="SELECT datadir FROM gp_segment_configuration WHERE ${where_clause} ORDER BY content, role"

     if is_GPDB5 "$gphome"; then
        sql="
        SELECT e.fselocation as datadir
        FROM gp_segment_configuration s
        JOIN pg_filespace_entry e ON s.dbid = e.fsedbid
        JOIN pg_filespace f ON e.fsefsoid = f.oid
        WHERE f.fsname = 'pg_system' AND ${where_clause}
        ORDER BY s.content, s.role"
    fi

    run "$gphome"/bin/psql -At -p "$port" postgres -c "$sql"
    [ "$status" -eq 0 ] || fail "$output"

    echo "$output"
}

# get_rsync_pairs maps the data directory of every standby/mirror with the
# corresponding master/primary. The map will later be used to rsync the
# contents of the mirror back to the primary.
get_rsync_pairs() {
    local gphome=$1
    local port=${2:-$PGPORT}

    local sql="
    WITH cte AS (select role, content, fselocation datadir FROM pg_filespace_entry INNER JOIN gp_segment_configuration on dbid=fsedbid)
    SELECT f1.datadir, f2.datadir FROM (SELECT * FROM CTE WHERE role='m') f1
    INNER JOIN (SELECT * FROM CTE where role='p') f2 on f1.content=f2.content;"

    run "$gphome"/bin/psql -At -p "$port" postgres -c "$sql"
    [ "$status" -eq 0 ] || fail "$output"

    echo $output
}

# setup_restore_cluster gathers the necessary information to later run
# restore_cluster
setup_restore_cluster() {
    local mode=$1

    if is_GPDB5 "$GPHOME_SOURCE"; then
        RSYNC_PAIRS=($(get_rsync_pairs $GPHOME_SOURCE))
    fi

    # In link mode we must bring the datadirs back to a good state, whereas in
    # copy mode we can discard the duplicate copy of the datadir after the
    # test. Specifically, in link mode we undo the rename of pg_control file.
    if [ "$mode" == "--mode=link" ]; then
        MASTER_AND_PRIMARY_DATADIRS=($(query_datadirs $GPHOME_SOURCE $PGPORT "role = 'p'"))
    else
        MASTER_AND_PRIMARY_DATADIRS=
    fi
}

# restore_cluster brings a cluster back to a known state before upgrade. It
# uses rsync to account for an issue in GPDB5 where the standby and mirrrors
# become out of sync and fail to start, thus causing gpstart to return non-zero
# exit code.
restore_cluster() {
    # Precondition: the source cluster must be down. rsync'ing over a live
    # cluster makes for some very strange and hard-to-debug failure modes.
    if isready; then
        abort "restore_cluster was invoked on a live source cluster (stop it first)"
    fi

    if is_GPDB5 "$GPHOME_SOURCE"; then
        for var in "${RSYNC_PAIRS[@]}"; do IFS="|"; set -- $var;
            rsync -r "$1/" "$2/" \
                --exclude=internal.auto.conf \
                --exclude=pg_hba.conf \
                --exclude=postmaster.opts \
                --exclude=postgresql.auto.conf \
                --exclude=internal.auto.conf \
                --exclude=gp_dbid \
                --exclude=postgresql.conf \
                --exclude=backup_label.old \
                --exclude=postmaster.pid \
                --exclude=recovery.conf
        done
    elif [[ -n ${MASTER_AND_PRIMARY_DATADIRS} ]]; then
        for datadir in "${MASTER_AND_PRIMARY_DATADIRS[@]}"; do
            mv "${datadir}/global/pg_control.old" "${datadir}/global/pg_control"
        done
    fi
}

# Writes the pieces of gp_segment_configuration that we need to ensure remain
# the same across upgrade, one segment per line, sorted by content ID.
get_segment_configuration() {
    local gphome=$1
    local port=${2:-$PGPORT}

    if is_GPDB5 "$gphome"; then
        "$gphome"/bin/psql -AXtF$'\t' -p "$port" postgres -c "
            SELECT s.content, s.role, s.hostname, s.port, e.fselocation as datadir
            FROM gp_segment_configuration s
            JOIN pg_filespace_entry e ON s.dbid = e.fsedbid
            JOIN pg_filespace f ON e.fsefsoid = f.oid
            WHERE f.fsname = 'pg_system'
            ORDER BY s.content, s.role
        "
    else
        "$gphome"/bin/psql -AXtF$'\t' -p "$port" postgres -c "
            SELECT content, role, hostname, port, datadir
            FROM gp_segment_configuration
            ORDER BY content, role
        "
    fi
}

# backup_source_cluster creates an rsync'd backup of a demo cluster and restores
# its original contents during teardown.
backup_source_cluster() {
    local backup_dir=$1

    if [[ "$MASTER_DATA_DIRECTORY" != *"/datadirs/qddir/demoDataDir-1" ]]; then
        abort "refusing to back up cluster with master '$MASTER_DATA_DIRECTORY'; demo directory layout required"
    fi

    # Don't use -p. It's important that the backup directory not exist so that
    # we know we have control over it. Also, don't assume set -e is enabled: if
    # it's not, registering an rm -rf teardown anyway could be extremely
    # dangerous.
    mkdir "$backup_dir" || return $?
    register_teardown rm -rf "$backup_dir"

    local datadir_root
    datadir_root="$(realpath "$MASTER_DATA_DIRECTORY"/../..)"

    gpstop -af
    register_teardown gpstart -a

    rsync --archive "${datadir_root:?}"/ "${backup_dir:?}"/
    register_teardown rsync --archive -I --delete "${backup_dir:?}"/ "${datadir_root:?}"/

    gpstart -a
    register_teardown stop_any_cluster
}

archive_state_dir() {
    state_dir=$1
    mv "${state_dir}" "${state_dir}_${BATS_TEST_NAME}"
}
